function mom = build_moments(data,col_long,fp,dummy_moments_data)

% Build a vector of moments, taking as input either real or simulated data.
% Note: dummy_moments_data is equal to 1 if 'data' is real data (with 
% N*M rows), and 0 if it is simulated data (with N*M*R rows).

id_vec = unique(data(:,col_long.id)); % this can be larger than fp.N if we use simulated data
if length(id_vec) < fp.N % this means we're using bootstrapped data (where replacement usually results in fewer than fp.N unique id numbers, which needs to be corrected)
    id_vec = fp.rand_order; % this is a vector passed along from the calling function above
end
% disp(['Calculating moments for ',num2str(length(id_vec)),' observations.'])

if length(id_vec) < fp.N
    disp('ERROR in moment function!');
    return;
end

%% Part 1: Mean and StDev of time allocation and wages by age

%pre-define the matrix for Part 1
moments_1 = nan(4,17);

% define child age categories: 3-5, 6-8, 9-12, 13-16
beg_age = [3 6 9 13];
end_age = [5 8 12 16];

% define mom_age thresholds 
% Note: these areloosely based on quartiles when wages or hours are non-missing
% use these for wages, which are more likely dependent on parents' age than child age
beg_age_mom = [18 33 38 43];
end_age_mom = [32 37 42 65];

% define dad_age thresholds
beg_age_dad = [18 33 38 43];
end_age_dad = [32 37 42 65];

for t = 1:4
    % select the rows we need for this age group
    row_ind = (data(:,col_long.age) >= beg_age(t) & data(:,col_long.age) <= end_age(t));
    row_ind_mom = (data(:,col_long.mom_age) >= beg_age_mom(t) & data(:,col_long.mom_age) <= end_age_mom(t));
    row_ind_dad = (data(:,col_long.dad_age) >= beg_age_dad(t) & data(:,col_long.dad_age) <= end_age_dad(t));
    
    %a) mean and std hours of time allocations (by child age)
    mom.mom_hours = 1; % mother labor supply conditional on working
    temp = (row_ind == 1 & data(:,col_long.mom_hours) > 0); % conditional on supplying positive hours
    moments_1(t,mom.mom_hours) = nanmean(data(temp,col_long.mom_hours));
    moments_1(t,mom.mom_hours+1) = nanstd(data(temp,col_long.mom_hours));
    
    mom.dad_hours = 3; % father labor supply conditional on working
    temp = (row_ind == 1 & data(:,col_long.dad_hours) > 0); % conditional on supplying positive hours
    moments_1(t,mom.dad_hours) = nanmean(data(temp,col_long.dad_hours));
    moments_1(t,mom.dad_hours+1) = nanstd(data(temp,col_long.dad_hours));
    
    mom.tau1 = 5; % mother time
    moments_1(t,mom.tau1) = nanmean(data( row_ind,col_long.tau1));
    moments_1(t,mom.tau1+1) = nanstd(data( row_ind,col_long.tau1));
    
    mom.tau2 = 7; % father time
    moments_1(t,mom.tau2) = nanmean(data( row_ind,col_long.tau2));
    moments_1(t,mom.tau2+1) = nanstd(data( row_ind,col_long.tau2));
    
    mom.tau_c = 9; % child self-investment time
    moments_1(t,mom.tau_c) = nanmean(data( row_ind,col_long.tau_c));
    moments_1(t,mom.tau_c+1) = nanstd(data( row_ind,col_long.tau_c));
    
    %b) moments by parental age
    mom.mom_wage = 11; % mother hourly wage
    moments_1(t,mom.mom_wage) = nanmean(data(row_ind_mom,col_long.mom_wage));
    moments_1(t,mom.mom_wage+1) = nanstd(data(row_ind_mom,col_long.mom_wage));
    
    mom.dad_wage = 13; % father hourly wage
    moments_1(t,mom.dad_wage) = nanmean(data(row_ind_dad,col_long.dad_wage));
    moments_1(t,mom.dad_wage+1) = nanstd(data(row_ind_dad,col_long.dad_wage));
    
    %c) employment rates (again by child age group)
    %marginal prob mom employed
    mom.mom_employ = 15; % mother LFP
    temp1 = nansum(data( row_ind,col_long.mom_hours) > 0);
    temp2 = nansum((data( row_ind,col_long.mom_hours)) == 0);
    moments_1(t,mom.mom_employ) = temp1./(temp1 + temp2);
    
    %marginal prob dad employed
    mom.dad_employ = 16; % father LFP
    temp1 = nansum(data( row_ind,col_long.dad_hours) > 0);
    temp2 = nansum((data( row_ind,col_long.dad_hours)) == 0);
    moments_1(t,mom.dad_employ) = temp1./(temp1 + temp2);
    
    %prob both employed
    mom.both_employ = 17; % joint LFP
    temp1 = nansum( data( row_ind,col_long.dad_hours) > 0 & data(row_ind,col_long.mom_hours) > 0 );
    temp2 = nansum( data( row_ind,col_long.dad_hours) == 0 | data(row_ind,col_long.mom_hours) == 0 );
    moments_1(t,mom.both_employ) = temp1./(temp1 + temp2);
end

moments_1 = moments_1(:); % vectorize, make 4x17 into a 68x1

% now save them separately
mom.moments_1 = moments_1;
% save the indices for these moments (convenient to set weights later)
mom.index_moments_1 = 1:length(moments_1);
moments_all = [moments_1];

%% Part 2: Average LW score by child age

agevec = fp.min_age:1:fp.max_age; % this is 3:16, so 14 moments
moments_2 = nan*ones(size(agevec,2),1);

% average test score by age (3 to 16)
for i = 1:size(agevec,2)
    age_i = agevec(i);
    rows_i = data(:,col_long.age)==age_i;
    moments_2(i,1) = nanmean(data(rows_i,col_long.lw_raw));
end

mom.moments_2 = moments_2;
mom.index_moments_2 = length(moments_all) + [1:length(moments_2)];
moments_all = [moments_all; moments_2];

%% Part 3: Parental wages conditional on parental age and education

% 1)dad education bins
dad_educ = data(:,col_long.dad_educ);
index_dad_HS = (dad_educ == 12);                    % High School graduate
index_dad_SC = (dad_educ >= 13 & dad_educ <= 15);   % Some College
index_dad_FC = (dad_educ >= 16);                    % Finished College (or more)

% 2) mom education bins
mom_educ = data(:,col_long.mom_educ);
index_mom_HS = (mom_educ == 12);                    % High School graduate
index_mom_SC = (mom_educ >= 13 & mom_educ <= 15);   % Some College
index_mom_FC = (mom_educ >= 16);                    % Finished College (or more)

% 3) dad age bins
index_old_dad = data(:,col_long.dad_age) >= 40;
index_old_mom = data(:,col_long.mom_age) >= 40;
index_young_dad = data(:,col_long.dad_age) < 40;
index_young_mom = data(:,col_long.mom_age) < 40;

moments_3 = NaN(15,1);

% Define the 6 (educ,age) bins for each parent:
tmp1 = (index_mom_HS == 1 & index_young_mom == 1);
tmp2 = (index_mom_HS == 1 & index_old_mom == 1);
tmp3 = (index_mom_SC == 1 & index_young_mom == 1);
tmp4 = (index_mom_SC == 1 & index_old_mom == 1);
tmp5 = (index_mom_FC == 1 & index_young_mom == 1);
tmp6 = (index_mom_FC == 1 & index_old_mom == 1);
tmp7 = (index_dad_HS == 1 & index_young_dad == 1);
tmp8 = (index_dad_HS == 1 & index_old_dad == 1);
tmp9 = (index_dad_SC == 1 & index_young_dad == 1);
tmp10 = (index_dad_SC == 1 & index_old_dad == 1);
tmp11 = (index_dad_FC == 1 & index_young_dad == 1);
tmp12 = (index_dad_FC == 1 & index_old_dad == 1);

% conditional means for mothers wages
moments_3(1) = nanmean(data(tmp1,col_long.mom_wage));
moments_3(2) = nanmean(data(tmp2,col_long.mom_wage));
moments_3(3) = nanmean(data(tmp3,col_long.mom_wage));
moments_3(4) = nanmean(data(tmp4,col_long.mom_wage));
moments_3(5) = nanmean(data(tmp5,col_long.mom_wage));
moments_3(6) = nanmean(data(tmp6,col_long.mom_wage));

% conditional means for fathers wages
moments_3(7) = nanmean(data(tmp7,col_long.dad_wage));
moments_3(8) = nanmean(data(tmp8,col_long.dad_wage));
moments_3(9) = nanmean(data(tmp9,col_long.dad_wage));
moments_3(10) = nanmean(data(tmp10,col_long.dad_wage));
moments_3(11) = nanmean(data(tmp11,col_long.dad_wage));
moments_3(12) = nanmean(data(tmp12,col_long.dad_wage));

% standard deviations (unconditional)
moments_3(13) = nanstd(data(:,col_long.mom_wage));
moments_3(14) = nanstd(data(:,col_long.dad_wage));

% correlation (unconditional)
temp1 = data(:,col_long.mom_wage);
temp2 = data(:,col_long.dad_wage);
temp = nancov(temp1,temp2,1);
moments_3(15) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

mom.moments_3 = moments_3;
mom.index_moments_3 = length(moments_all) + [1:length(moments_3)];
moments_all = [moments_all; moments_3];

%% Part 4: Moments that capture the (input,output) process

index97 = (data(:,col_long.age) == data(:,col_long.child_age97)); % 247 positive entries in the data
index02 = (data(:,col_long.age) == data(:,col_long.child_age97)+5); % 247 positive entries in the data
index07 = (data(:,col_long.age) == data(:,col_long.child_age97)+10); % only 129 positive entries in the data

age_97 = data(index97,col_long.age); % child ages in 1997: 247 by 1
age_02 = data(index02,col_long.age); % child ages in 2002: 247 by 1
age_07 = data(index07,col_long.age); % child ages in 2007: 129 by 1
age_all = [age_97; age_02; age_07]; % stacked vector: 623 valid CDS observations (i.e. 247 kids which are observed 2 or 3 times)
temp = [index07(6:end); 0; 0; 0; 0; 0]; % all the ones are shifted 5 rows "higher" now, corresponding to year 2002
age_02_temp = data(temp==1,col_long.age); % age in 2002 for those kids whom we also observe in 07
age_t = [age_97; age_02_temp]; % stacked vector: 247+129 = 376 kids who are observed 3 times (i.e. we can calculate their changes twice)

% define child age bins for the "contemporaneous" moments
child_bin1 = (age_all >= 3 & age_all <= 16);
child_bin2 = (age_all >= 3 & age_all <= 7);
child_bin3 = (age_all >= 8 & age_all <= 11);
child_bin4 = (age_all >= 12 & age_all <= 16);

% define child age bins for the "lagged" moments
child_bin1_t = (age_t >= 3 & age_t <= 11); % these kids will be aged 8-16 at time t+5, which is either 2002 or 2007
child_bin2_t = (age_t >= 3 & age_t <= 7); % these kids will be aged 8-12 at time t+5
child_bin3_t = (age_t >= 8 & age_t <= 11); % these kids will be aged 13-16 at time t+5

% define Letter Word score data (contemporaneous and lagged, levels and changes)
LW_97 = data(index97,col_long.lw_raw); % LW in 1997 (247 obs)
LW_02 = data(index02,col_long.lw_raw); % LW in 2002 (247 obs)
LW_07 = data(index07,col_long.lw_raw); % LW in 2007 (129 obs)

LW_all = [LW_97; LW_02; LW_07]; % all observed LW scores
LW_bin1 = LW_all(child_bin1); % LW score of all kids (either in 97, 02 or 07)
LW_bin2 = LW_all(child_bin2); % LW score of all kids aged 3-7
LW_bin3 = LW_all(child_bin3); % LW score of all kids aged 8-11
LW_bin4 = LW_all(child_bin4); % LW score of all kids aged 12-16

% now define LW at time t, but only for those kids for whom we have a LW score at time t+5
LW_tp5 = [LW_02; LW_07]; % LW scores at time t+5 (376 obs)
% define the vector of LW scores in 2002 for kids for which we also have LW in 2007
temp = [index07(6:end); 0; 0; 0; 0; 0]; % all the ones are 5 rows "higher" now, corresponding to year 2002
LW_02_temp = data(temp==1,col_long.lw_raw);
LW_t = [LW_97; LW_02_temp]; % LW scores at time t for kids we also observe at t+5 (376 obs)

dLW_all = LW_tp5 - LW_t; % absolute changes in LW between time t and t+5
dLW_bin1 = dLW_all(child_bin1_t); % change in LW scores for kids aged 3-11 at time t (97 or 02)
dLW_bin2 = dLW_all(child_bin2_t); % change in LW scores for kids aged 3-7 at time t (97 or 02)
dLW_bin3 = dLW_all(child_bin3_t); % change in LW scores for kids aged 8-11 at time t (97 or 02)

LW_t_bin1 = LW_t(child_bin1_t);
LW_t_bin2 = LW_t(child_bin2_t);
LW_t_bin3 = LW_t(child_bin3_t);
LW_tp5_bin1 = LW_tp5(child_bin1_t); % LW score in t+5 for kids aged 3-5 at time t
LW_tp5_bin2 = LW_tp5(child_bin2_t); % LW score in t+5 for kids aged 6-8 at time t
LW_tp5_bin3 = LW_tp5(child_bin3_t); % LW score in t+5 for kids aged 9-11 at time t

%% Part 4.a) Moments of LW score (levels and changes) for various child age bins

moments_LW = NaN(14,1);
% averages of LW levels by age group
moments_LW(1) = nanmean(LW_bin1);
moments_LW(2) = nanmean(LW_bin2);
moments_LW(3) = nanmean(LW_bin3);
moments_LW(4) = nanmean(LW_bin4);

% stdevs of LW levels by age group
moments_LW(5) = nanstd(LW_bin1);
moments_LW(6) = nanstd(LW_bin2);
moments_LW(7) = nanstd(LW_bin3);
moments_LW(8) = nanstd(LW_bin4);

% averages of changes between t and t+5
moments_LW(9) = nanmean(dLW_bin1);
moments_LW(10) = nanmean(dLW_bin2);
moments_LW(11) = nanmean(dLW_bin3);

% correlation between t and t+5, agebin1
temp1 = LW_t_bin1;
temp2 = LW_tp5_bin1;
temp = nancov(temp1,temp2,1);
moments_LW(12) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% correlation between t and t+5, agebin2
temp1 = LW_t_bin2;
temp2 = LW_tp5_bin2;
temp = nancov(temp1,temp2,1);
moments_LW(13) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% correlation between t and t+5, agebin3
temp1 = LW_t_bin3;
temp2 = LW_tp5_bin3;
temp = nancov(temp1,temp2,1);
moments_LW(14) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

mom.moments_LW = moments_LW;
mom.index_moments_LW = length(moments_all) + [1:length(moments_LW)];
moments_all = [moments_all; moments_LW];

%% 4.b) Moments that relate mother's active time to LW raw score

% define tau1 data (contemporaneous and lagged)
tau1_97 = data(index97,col_long.tau1);
tau1_02 = data(index02,col_long.tau1);
tau1_07 = data(index07,col_long.tau1);
tau1_all = [tau1_97; tau1_02; tau1_07];
tau1_bin1 = tau1_all(child_bin1);

temp = [index07(6:end); 0; 0; 0; 0; 0]; % all the ones are 5 rows "higher" now, corresponding to year 2002
tau1_02_temp = data(temp==1,col_long.tau1);
tau1_t = [tau1_97; tau1_02_temp];
tau1_t_bin1 = tau1_t(child_bin1_t);
tau1_t_bin2 = tau1_t(child_bin2_t);
tau1_t_bin3 = tau1_t(child_bin3_t);

moments_tau1 = NaN(4,1);

% 1. contemporaneous correlation of tau1 and LW for child age bin 1 (all ages)
temp1 = LW_bin1;
temp2 = tau1_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau1(1) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 2. correlation of tau1 and dLW (absolute change) for child age bin 1
temp1 = dLW_bin1;
temp2 = tau1_t_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau1(2) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 3. correlation of tau1 and dLW (absolute change) for child age bin 2
temp1 = dLW_bin2;
temp2 = tau1_t_bin2;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau1(3) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 4. correlation of tau1 and dLW (absolute change) for child age bin 3
temp1 = dLW_bin3;
temp2 = tau1_t_bin3;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau1(4) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

mom.moments_tau1 = moments_tau1;
mom.index_moments_tau1 = length(moments_all) + [1:length(moments_tau1)];
moments_all = [moments_all; moments_tau1];

%% 4.c) Moments that relate father's active time to LW raw score

% define tau2 data (contemporaneous and lagged)
tau2_97 = data(index97,col_long.tau2);
tau2_02 = data(index02,col_long.tau2);
tau2_07 = data(index07,col_long.tau2);
tau2_all = [tau2_97; tau2_02; tau2_07];
tau2_bin1 = tau2_all(child_bin1);

temp = [index07(6:end); 0; 0; 0; 0; 0]; % all the ones are 5 rows "higher" now, corresponding to year 2002
tau2_02_temp = data(temp==1,col_long.tau2);
tau2_t = [tau2_97; tau2_02_temp];
tau2_t_bin1 = tau2_t(child_bin1_t);
tau2_t_bin2 = tau2_t(child_bin2_t);
tau2_t_bin3 = tau2_t(child_bin3_t);

moments_tau2 = NaN(4,1);

% 1. contemporaneous correlation of tau2 and LW for child age bin 1 (all ages)
temp1 = LW_bin1;
temp2 = tau2_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau2(1) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 2. correlation of tau2 and dLW (absolute change) for child age bin 1
temp1 = dLW_bin1;
temp2 = tau2_t_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau2(2) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 3. correlation of tau2 and dLW (absolute change) for child age bin 2
temp1 = dLW_bin2;
temp2 = tau2_t_bin2;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau2(3) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 4. correlation of tau2 and dLW (absolute change) for child age bin 3
temp1 = dLW_bin3;
temp2 = tau2_t_bin3;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tau2(4) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

mom.moments_tau2 = moments_tau2;
mom.index_moments_tau2 = length(moments_all) + [1:length(moments_tau2)];
moments_all = [moments_all; moments_tau2];

%% 4.d) Moments that relate child self-investment time to LW raw score

% define tau_c data (contemporaneous and lagged)
tauc_97 = data(index97,col_long.tau_c);
tauc_02 = data(index02,col_long.tau_c);
tauc_07 = data(index07,col_long.tau_c);
tauc_all = [tauc_97; tauc_02; tauc_07];
tauc_bin1 = tauc_all(child_bin1);
tauc_bin2 = tauc_all(child_bin2);
tauc_bin3 = tauc_all(child_bin3);
tauc_bin4 = tauc_all(child_bin4);

temp = [index07(6:end); 0; 0; 0; 0; 0]; % all the ones are 5 rows "higher" now, corresponding to year 2002
tauc_02_temp = data(temp==1,col_long.tau_c);
tauc_t = [tauc_97; tauc_02_temp];
tauc_t_bin1 = tauc_t(child_bin1_t);
tauc_t_bin2 = tauc_t(child_bin2_t);
tauc_t_bin3 = tauc_t(child_bin3_t);

moments_tauc = NaN(4,1);

% 1. contemporaneous correlation of tauc and LW for child age bin 1 (all ages)
temp1 = LW_bin1;
temp2 = tauc_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tauc(1) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 2. correlation of tauc and dLW (absolute change) for child age bin 1
temp1 = dLW_bin1;
temp2 = tauc_t_bin1;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tauc(2) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 3. correlation of tauc and dLW (absolute change) for child age bin 2
temp1 = dLW_bin2;
temp2 = tauc_t_bin2;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tauc(3) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

% 4. correlation of tauc and dLW (absolute change) for child age bin 3
temp1 = dLW_bin3;
temp2 = tauc_t_bin3;
tmprows = isnan(temp1) == 0 & isnan(temp2) == 0;
temp = nancov(temp1(tmprows), temp2(tmprows),1);
moments_tauc(4) = temp(1,2)./(nanstd(temp1(tmprows),1) .* nanstd(temp2(tmprows),1));

mom.moments_tauc = moments_tauc;
mom.index_moments_tauc = length(moments_all) + [1:length(moments_tauc)];
moments_all = [moments_all; moments_tauc];

%% 4.e) Moments that correlate parental education, test scores and parental time inputs

moments_educ = NaN(12,1);

% First, only for ages 3-8:
row_ind = (data(:,col_long.age) >= 3 & data(:,col_long.age) <= 8);

% 1) mom_educ, LW
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(1,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 2) mom_educ, tau1
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.tau1);
temp = nancov(temp1,temp2,1);
moments_educ(2,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 3) dad_educ, LW
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(3,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 4) dad_educ, tau2
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.tau2);
temp = nancov(temp1,temp2,1);
moments_educ(4,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% Second, only for ages 9-12:
row_ind = (data(:,col_long.age) >= 9 & data(:,col_long.age) <= 12);

% 1) mom_educ, LW
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(5,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 2) mom_educ, tau1
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.tau1);
temp = nancov(temp1,temp2,1);
moments_educ(6,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 3) dad_educ, LW
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(7,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 4) dad_educ, tau2
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.tau2);
temp = nancov(temp1,temp2,1);
moments_educ(8,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% Finally, only for ages 13-16:
row_ind = (data(:,col_long.age) >= 13 & data(:,col_long.age) <= 16);

% 1) mom_educ, LW
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(9,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 2) mom_educ, tau1
temp1 = data(row_ind,col_long.mom_educ);
temp2 = data(row_ind,col_long.tau1);
temp = nancov(temp1,temp2,1);
moments_educ(10,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 3) dad_educ, LW
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_educ(11,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));
% 4) dad_educ, tau2
temp1 = data(row_ind,col_long.dad_educ);
temp2 = data(row_ind,col_long.tau2);
temp = nancov(temp1,temp2,1);
moments_educ(12,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

mom.moments_educ = moments_educ;
mom.index_moments_educ = length(moments_all) + [1:length(moments_educ)];
moments_all = [moments_all; moments_educ];

%% 5) Moments on CCT use based on 2002 and 2007 CDS allowance data

% Define CCT based moments (only for 2002-2007 where we observe CCT
% use in the data, and where all kids are at least 8 years old)

moments_CCT = NaN(7,1);
row_ind = (data(:,col_long.age) >= 8 & data(:,col_long.age) <= 16);

% 1) E(CCT) unconditionally
moments_CCT(1,1) = nanmean(data(row_ind,col_long.CCT));

% 2) corr between CCT and child age
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.age);
temp = nancov(temp1,temp2,1);
moments_CCT(2,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% 3) corr between CCT and mom educ, all ages
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.mom_educ);
temp = nancov(temp1,temp2,1);
moments_CCT(3,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% 4) corr between CCT and dad educ, all ages
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.dad_educ);
temp = nancov(temp1,temp2,1);
moments_CCT(4,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% 5) corr between CCT and mom investment time, all ages
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.tau1);
temp = nancov(temp1,temp2,1);
moments_CCT(5,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% 6) corr between CCT and dad investment time, all ages
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.tau2);
temp = nancov(temp1,temp2,1);
moments_CCT(6,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

% 7) corr between CCT and same-period test score, young
temp1 = data(row_ind,col_long.CCT);
temp2 = data(row_ind,col_long.lw_raw);
temp = nancov(temp1,temp2,1);
moments_CCT(7,1) = temp(1,2)...
    ./(nanstd(temp1(isnan(temp1) == 0 & isnan(temp2) == 0),1) ...
    .* nanstd(temp2(isnan(temp1) == 0 & isnan(temp2) == 0),1));

mom.moments_CCT = moments_CCT;
mom.index_moments_CCT = length(moments_all) + [1:length(moments_CCT)];
moments_all = [moments_all; moments_CCT];

mom.moments_varying = moments_all; % moments with variation

%% 6) External moments: child expenditures (from CRC report 2015) and child discount factors (from Steinberg data)
% See Table 1, p24 in CRC 2015 report on child expenditures by household income
% Fraction of (e+x)/Y for
% - income < 59200 in 2015 $ = let's say $1150/wk = 27%
% - income > 59200 and < 107400 in 2015 $ = let's say > $1150/wk and < $2100/wk = 16%
% - income > 107400 in 2015 $ = let's say $2100/wk = 11%

moments_external = NaN*ones(10,1); % 2 on expenses + later add on 8 beta_c moments

if dummy_moments_data == 1
    % This means we're calculating (or loading) actual data moments
    
    % 1) Calculate fraction of (e+x)/Y by child age and parental income,
    % see 2015 CRC file
    % We target mean weekly expenditures and fraction of total household income
    moments_external(1,1) = 250; % average of sum of (e+x)
    moments_external(2,1) = 0.21; % average fraction (e+x)/Y
    
    tmp_ind = 2;
    
    % 2) Calculate average child discount factor by age and parental education (annualized, correced for risk aversion and present bias)
    % Calculated in data_load_steinberg.m - and copied here, See also paper Table 12
    moments_external(tmp_ind+1,1) = 0.6797; % child ages 10-12, all hh
    moments_external(tmp_ind+2,1) = 0.6201; % child ages 10-12, less than HS
    moments_external(tmp_ind+3,1) = 0.6946; % child ages 10-12, (some) college
    moments_external(tmp_ind+4,1) = 0.7969; % child ages 10-12, graduate
    
    moments_external(tmp_ind+5,1) = 0.7103; % child ages 13-17, all hh
    moments_external(tmp_ind+6,1) = 0.6761; % child ages 13-17, less than HS
    moments_external(tmp_ind+7,1) = 0.7204; % child ages 13-17, (some) college
    moments_external(tmp_ind+8,1) = 0.7974; % child ages 13-17, graduate
    
elseif dummy_moments_data == 0
    % This means we're calculating moments of model-simulated data
    
    % Select only households with "middle income" approximately in the range of
    % 59200 and 107400 per year as per the CRC report (which is in 2015 prices; our
    % model is in 2007 prices)
    income_lims = [1000 2500];
    row_ind = (data(:,col_long.age) >= 3 & data(:,col_long.age) <= 16 & data(:,col_long.Y) > income_lims(1) & data(:,col_long.Y) <= income_lims(2));
    tmp_e = data(row_ind,col_long.e);
    tmp_x = data(row_ind,col_long.x);
    tmp_Y = data(row_ind,col_long.Y);
    moments_external(1,1) = nanmean(tmp_x+tmp_e);
    moments_external(2,1) = nanmean((tmp_x+tmp_e)./tmp_Y);
    
    tmp_ind = 2; % total number of moments defined so far
    
    % define child age categories
    beg_age = [10 13];
    end_age = [12 17];
    % Define education categories: all / high school or less / some college or college / graduate
    beg_educ = [1 1 13 17];
    end_educ = [17 12 16 17];

    tmp_table = NaN(length(beg_educ),length(beg_age)); % 2 by 4 matrix: columns = 2 age categories, rows = 4 education categories
    
    for ii = 1:length(beg_age)
        for jj = 1:length(beg_educ)
            % select the rows we need for this age group
            row_ind = (data(:,col_long.age) >= beg_age(ii) & data(:,col_long.age) <= end_age(ii) & data(:,col_long.dad_educ) >= beg_educ(jj) & data(:,col_long.dad_educ) <= end_educ(jj));
            tmp_n = data(row_ind,col_long.n); % child's patience capital at the start of the period
            tmp_beta = tmp_n./(1+tmp_n); % transform into discount factors
            tmp_table(jj,ii) = nanmean(tmp_beta);
        end
    end
    moments_external(tmp_ind+1:end) = tmp_table(:); %vectorized
end

mom.moments_external = moments_external;
mom.index_moments_external = length(moments_all) + [1:length(moments_external)];
moments_all = [moments_all; moments_external];

%% Now save all the moments
mom.moments_all = moments_all;